R Sienna example:

rm(list = ls())

fpackage.check <- function(packages) {
    lapply(packages, FUN = function(x) {
        if (!require(x, character.only = TRUE)) {
            install.packages(x, dependencies = TRUE)
            library(x, character.only = TRUE)
        }
    })
}

fsave <- function(x, file = NULL, location = "./data/processed/") {
    ifelse(!dir.exists("data"), dir.create("data"), FALSE)
    ifelse(!dir.exists("data/processed"), dir.create("data/processed"), FALSE)
    if (is.null(file))
        file = deparse(substitute(x))
    datename <- substr(gsub("[:-]", "", Sys.time()), 1, 8)
    totalname <- paste(location, datename, file, ".rda", sep = "")
    save(x, file = totalname)  #need to fix if file is reloaded as input name, not as x. 
}

fload <- function(filename) {
    load(filename)
    get(ls()[ls() != "filename"])
}

fshowdf <- function(x, ...) {
    knitr::kable(x, digits = 2, "html", ...) %>%
        kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>%
        kableExtra::scroll_box(width = "100%", height = "300px")
}

colorize <- function(x, color) {
    sprintf("<span style='color: %s;'>%s</span>", color, x)
}
fpackage.check(packages)
[[1]]
NULL

NEED DATA - use build in datasets from RSIENNA ?s501 Is adjacency matrix for network type 1. Goal: Follow steps in 7.1 for different data:

Step 1. Define data

Dependent variable: ties

s502 #network at timepoint 2
   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26 V27 V28 V29 V30 V31 V32 V33 V34
1   0  0  0  0  0  0  0  0  0   1   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
2   0  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
3   0  0  0  1  0  0  0  0  1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
4   0  0  1  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
5   0  0  0  1  0  0  0  0  0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
6   0  0  0  0  0  0  0  1  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
7   0  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
8   0  0  0  0  0  1  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
9   0  0  1  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
10  1  0  0  0  0  0  0  0  0   0   1   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0
11  1  0  0  0  0  0  0  0  0   1   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   1   0   0   0   1   0   0   0   0
12  0  0  0  0  0  0  0  0  0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
13  0  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
14  1  0  0  0  0  0  0  0  0   1   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
15  0  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
16  0  0  0  0  0  0  0  0  0   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
17  0  0  0  1  1  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   1   0   0   0   0   0   0   0   1   0   0
18  0  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
19  0  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   1   0   0   0   0
20  0  0  0  0  0  0  0  0  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   V35 V36 V37 V38 V39 V40 V41 V42 V43 V44 V45 V46 V47 V48 V49 V50
1    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
2    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
3    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
4    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
5    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
6    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
7    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
8    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
9    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
10   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
11   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
12   0   0   0   0   0   0   0   1   0   1   0   0   0   0   0   0
13   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
14   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
15   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0
16   0   0   0   0   0   0   0   1   0   0   0   0   0   0   0   0
17   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
18   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
19   0   1   0   0   0   0   1   0   0   0   0   0   0   0   0   0
20   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
 [ reached 'max' / getOption("max.print") -- omitted 30 rows ]

Inspect networks. Remove NAs, make sure diagonal. Need to see binary (not weighted)

Put networks in an array

c(dim(s501),2)
[1] 50 50  2

could replace this with data set from last week

Define dependent - and independent - variable

 # don't need wave 2 - just modeling at independent variable, influencing all ministeps between time 1 and time 2 
alcohol 
 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 
 3  2  2  2  3  4  4  4  2  4  5  5  3  3  4  4  2  4  3  2  1  3  4  3  3  4  2  2  3  1  4  4  3  2  3  4  2  3  2  1  4  4  2  5 
45 46 47 48 49 50 
 2  2  2  2  1  1 

Now combine into dataset - if use Rsiena, need this.

myeff
  effectName               include fix   test  initialValue parm
1 basic rate parameter net TRUE    FALSE FALSE    4.69604   0   
2 outdegree (density)      TRUE    FALSE FALSE   -1.48852   0   
3 reciprocity              TRUE    FALSE FALSE    0.00000   0   

Now can look at myeff - look at starting values - has density of two networks, reciprocity

Want much more statistics for model, though!!

Step 2:

Can see all effects that could include in the dataset. Which is a lot. And this is a simple dataset. and yeah, its huge. Also can see that the type depends on weight function (evaluation (), endowment (), and creation ())

Example. Jochem wants to make tie with Jos. Jochem’s consideration to make a tie with Jos could be different than consideration to maintain a tie, could be different that decistions to break a tie. Can’t estimate all three. Evalution: mechanisms the same for making as breaking, endowment = maintaining, creation = if there are none then what is the cost.

In this course: just use evaluation function. Assume mechanisms and functions to make/break ties are similar. include degree for evaluation. But what are all these effects? - should be able to calculate statistic for each effect for an ego in a network… Mathematical formula in chapter 12.

effectsDocumentation(myeff)
Effects documentation written to file myeff.html .

Make sure to understand out degree and reciprocity effects - statistics - will play around with this next week. Need to think of theory for which statistics are relevant. - Look at literature for statistics, think about it, play and see if effects exists - how work in reality? propose idea, write notation, see if effect exists in manual, etc. Lots of notation because everyone is looking for their own effects. - need to include more effects if what to.

Step 3: Look at intial data

print01Report(mydata)

# gives initial description of data 
# reading network variables , covariates, density measures/changes in networks, tie changes between subsequent observations... calculate how much networks changed over time. 
# dont use balance calculation 

Step 4: Add effects

myeff <- includeEffects(myeff, isolateNet, inPop, outAct)
  effectNumber effectName            shortName  include fix   test  initialValue parm
1  78          indegree - popularity inPop      TRUE    FALSE FALSE          0   0   
2 106          outdegree - activity  outAct     TRUE    FALSE FALSE          0   0   
3 149          network-isolate       isolateNet TRUE    FALSE FALSE          0   0   

outAct - look at the number of times I have and square that: especially the people that have a lot of ties will send a lot of ties. - evaluation function of tie1 0 ties, and tie1 4 ties - then at t2 people who have ties are more likely to send more ties

We know tie distributions are skewed, some people send a lot of ties and others dont.

inPop: in degree activity: people who reseive a lot of in-degrees send a lot of out-degrees

isolateNet: people without a lot of indegree nets.

Can see that starting values are 0 – hypothesis testing

Step 5: Estimation

ansM1
Estimates, standard errors and convergence t-ratios

                                Estimate   Standard   Convergence 
                                             Error      t-ratio   

Rate parameters: 
  0       Rate parameter         5.6461  ( 0.8889   )             

Other parameters: 
  1. eval outdegree (density)   -2.1186  ( 0.4326   )   -0.0337   
  2. eval reciprocity            2.4448  ( 0.2376   )   -0.0049   
  3. eval indegree - popularity  0.0724  ( 0.0638   )   -0.0378   
  4. eval outdegree - activity  -0.0645  ( 0.0676   )   -0.0412   
  5. eval network-isolate        1.7737  ( 2.2226   )   -0.0120   

Overall maximum convergence ratio:    0.0684 


Total of 2203 iteration steps.

Now have first estimated Rsiena model!

if significant: more indegrees- more likely to send later. people with more out-degrees - more likely to send less (negative). isolate more likely to be alone.

Need to think of model that makes sense in this

REVIEWING RSIENA NOW - negative degree - density is less than .5 - interpretations - Mailing list for TOM

Afternoon: play with estimating our own models Nice to do with our own data - collaboration networks. Logic of RSIENA: took actor oriented approach - works if ties are directed. if undirected network, logic is different: the actor deciding on undirected tie is difficult. Advise: for now treat as undirected - and then evaluate tie. undirected tie by reach consensus, forced into concensus, etc - reciprocity matters less now.

Now play with it

Now - referencing web scraping site

fcolnet <- function(data = scholars, university = "RU", discipline = "sociology", waves = list(c(2015,
    2018), c(2019, 2023)), type = c("first")) {

    # step 1
    demographics <- do.call(rbind.data.frame, data$demographics)
    demographics <- demographics %>%
        mutate(Universiteit1.22 = replace(Universiteit1.22, is.na(Universiteit1.22), ""), Universiteit2.22 = replace(Universiteit2.22,
            is.na(Universiteit2.22), ""), Universiteit1.24 = replace(Universiteit1.24, is.na(Universiteit1.24),
            ""), Universiteit2.24 = replace(Universiteit2.24, is.na(Universiteit2.24), ""), discipline.22 = replace(discipline.22,
            is.na(discipline.22), ""), discipline.24 = replace(discipline.24, is.na(discipline.24), ""))

    sample <- which((demographics$Universiteit1.22 %in% university | demographics$Universiteit2.22 %in%
        university | demographics$Universiteit1.24 %in% university | demographics$Universiteit2.24 %in%
        university) & (demographics$discipline.22 %in% discipline | demographics$discipline.24 %in% discipline))

    demographics_soc <- demographics[sample, ]
    scholars_sel <- lapply(scholars, "[", sample)

    # step 2
    ids <- demographics_soc$au_id
    nwaves <- length(waves)
    nets <- array(0, dim = c(nwaves, length(ids), length(ids)), dimnames = list(wave = 1:nwaves, ids,
        ids))
    dimnames(nets)

    # step 3
    df_works <- tibble(works_id = unlist(lapply(scholars_sel$work, function(l) l$id)), works_author = unlist(lapply(scholars_sel$work,
        function(l) l$author), recursive = FALSE), works_year = unlist(lapply(scholars_sel$work, function(l) l$publication_year),
        recursive = FALSE))

    df_works <- df_works[!duplicated(df_works), ]

    # step 4
    if (type == "first") {
        for (j in 1:nwaves) {
            df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
                ]
            for (i in 1:nrow(df_works_w)) {
                ego <- df_works_w$works_author[i][[1]]$au_id[1]
                alters <- df_works_w$works_author[i][[1]]$au_id[-1]
                if (sum(ids %in% ego) > 0 & sum(ids %in% alters) > 0) {
                  nets[j, which(ids %in% ego), which(ids %in% alters)] <- 1
                }
            }
        }
    }

    if (type == "last") {
        for (j in 1:nwaves) {
            df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
                ]
            for (i in 1:nrow(df_works_w)) {
                ego <- rev(df_works_w$works_author[i][[1]]$au_id)[1]
                alters <- rev(df_works_w$works_author[i][[1]]$au_id)[-1]
                if (sum(ids %in% ego) > 0 & sum(ids %in% alters) > 0) {
                  nets[j, which(ids %in% ego), which(ids %in% alters)] <- 1
                }
            }
        }
    }

    if (type == "all") {
        for (j in 1:nwaves) {
            df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
                ]
            for (i in 1:nrow(df_works_w)) {
                egos <- df_works_w$works_author[i][[1]]$au_id
                if (sum(ids %in% egos) > 0) {
                  nets[j, which(ids %in% egos), which(ids %in% egos)] <- 1
                }
            }
        }
    }
    output <- list()
    output$data <- scholars_sel
    output$nets <- nets
    return(output)
}

Example in class from 10 October Siena dependent variable

answer_ex
Estimates, standard errors and convergence t-ratios

                                           Estimate   Standard   Convergence 
                                                        Error      t-ratio   
Network Dynamics 
   1. rate constant mynet1 rate (period 1)  5.7880  ( 0.9880   )    0.0777   
   2. rate constant mynet1 rate (period 2)  4.5365  ( 0.6468   )   -0.0510   
   3. eval outdegree (density)             -1.9196  ( 0.2519   )    0.0175   
   4. eval reciprocity                      2.7951  ( 0.1876   )   -0.0070   
   5. eval unequal mybeh                   -0.4987  ( 0.3490   )    0.0032   
   6. eval unequal smoke                   -0.1958  ( 0.1405   )   -0.0074   

Behavior Dynamics
   7. rate rate mybeh (period 1)            1.2022  ( 0.3140   )    0.0557   
   8. rate rate mybeh (period 2)            1.6729  ( 0.4611   )    0.0577   
   9. eval mybeh linear shape               0.3601  ( 0.1454   )    0.0118   
  10. eval mybeh quadratic shape           -0.1961  ( 0.0906   )    0.0197   

Overall maximum convergence ratio:    0.1356 


Total of 2926 iteration steps.
---
title: "R Notebook"
output: html_notebook
---

# R Sienna example: 


```{r}
rm(list = ls())

fpackage.check <- function(packages) {
    lapply(packages, FUN = function(x) {
        if (!require(x, character.only = TRUE)) {
            install.packages(x, dependencies = TRUE)
            library(x, character.only = TRUE)
        }
    })
}

fsave <- function(x, file = NULL, location = "./data/processed/") {
    ifelse(!dir.exists("data"), dir.create("data"), FALSE)
    ifelse(!dir.exists("data/processed"), dir.create("data/processed"), FALSE)
    if (is.null(file))
        file = deparse(substitute(x))
    datename <- substr(gsub("[:-]", "", Sys.time()), 1, 8)
    totalname <- paste(location, datename, file, ".rda", sep = "")
    save(x, file = totalname)  #need to fix if file is reloaded as input name, not as x. 
}

fload <- function(filename) {
    load(filename)
    get(ls()[ls() != "filename"])
}

fshowdf <- function(x, ...) {
    knitr::kable(x, digits = 2, "html", ...) %>%
        kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>%
        kableExtra::scroll_box(width = "100%", height = "300px")
}

colorize <- function(x, color) {
    sprintf("<span style='color: %s;'>%s</span>", color, x)
}
```



```{r}
packages = c("RSiena", "devtools", "igraph")
fpackage.check(packages)
# devtools::install_github('JochemTolsma/RsienaTwoStep', build_vignettes=TRUE)
packages = c("RsienaTwoStep")
fpackage.check(packages)
```

NEED DATA - use build in datasets from RSIENNA 
?s501
Is adjacency matrix for network type 1. Goal: Follow steps in 7.1 for different data:

# Step 1. Define data
Dependent variable: ties 


```{r}
s501 #network at timepoint 1
s502 #network at timepoint 2
```


Inspect networks. Remove NAs, make sure diagonal. Need to see binary (not weighted)
```{r}

```


Put networks in an array
```{r}

dim(s501) #says 50x50: 50 rows, 50 columns in data. 

nets <- array (data = c(s501, s502), dim = c(dim(s501), 2)) #
c(dim(s501),2)

# dimentions of array are number of columns and number of rows 
```
 could replace this with data set from last week 
 
 
Define dependent - and independent - variable
```{r}

net <- sienaDependent(nets) #dependent variable 

  # independent variable - s50a - we will take first way 
alcohol <- s50a[,1]alcohol
 # don't need wave 2 - just modeling at independent variable, influencing all ministeps between time 1 and time 2 
alcohol 
 
alcohol <- coCovar(alcohol)# at actor level - mean centered - is a time constant coVariate 
```

Now combine into dataset - if use Rsiena, need this. 
```{r}
mydata <- sienaDataCreate(net, alcohol)

myeff <- getEffects(mydata) #have a look 
myeff
```
Now can look at myeff - look at starting values - has density of two networks, reciprocity 


Want much more statistics for model, though!! 

# Step 2: 
Can see all effects that could include in the dataset. Which is a lot. And this is a simple dataset. and yeah, its huge. Also can see that the type depends on weight function (evaluation (), endowment (), and creation ())

Example. Jochem wants to make tie with Jos. Jochem's consideration to make a tie with Jos could be different than consideration to maintain a tie, could be different that decistions to break a tie. Can't estimate all three. Evalution: mechanisms the same for making as breaking, endowment = maintaining, creation = if there are none then what is the cost. 

In this course: just use evaluation function. Assume mechanisms and functions to make/break ties are similar. include degree for evaluation. But what are all these effects? 
-    should be able to calculate statistic for each effect for an ego in a network... Mathematical formula in chapter 12. 


```{r}
effectsDocumentation(myeff)
```


Make sure to understand out degree and reciprocity effects - statistics - will play around with this next week. Need to think of theory for which statistics are relevant. 
-    Look at literature for statistics, think about it, play and see if effects exists 
-    how work in reality? propose idea, write notation, see if effect exists in manual, etc. Lots of notation because everyone is looking for their own effects. 
-    need to include more effects if what to. 

```{r}

```



# Step 3: Look at intial data
-    good for deciding statistics to use. 
```{r}
print01Report(mydata)

# gives initial description of data 
# reading network variables , covariates, density measures/changes in networks, tie changes between subsequent observations... calculate how much networks changed over time. 
# dont use balance calculation 
```

# Step 4: Add effects

```{r}
myeff <- includeEffects(myeff, isolateNet, inPop, outAct)

#outAct - outdegree related activity 
```

outAct
- look at the number of times I have and square that: especially the people that have a lot of ties will send a lot of ties. 
- evaluation function of tie1 0 ties, and tie1 4 ties - then at t2 people who have ties are more likely to send more ties 

We know tie distributions are skewed, some people send a lot of ties and others dont. 

inPop: in degree activity: people who reseive a lot of in-degrees send a lot of out-degrees 

isolateNet: people without a lot of indegree nets. 



Can see that starting values are 0 -- hypothesis testing 

# Step 5: Estimation

```{r}
myAlgorithm <- sienaAlgorithmCreate(projname = "test")

# define algorithm that says how I want to estimate my model. will use defaults. 

ansM1 <- siena07(myAlgorithm, data = mydata, effects = myeff, returnDeps = TRUE)
# then run it to see 

ansM1

```
Now have first estimated Rsiena model! 


- see:negative out degree 
- 2.4 reciprocity: is important then in this model 
- indegree populaty is popular but not significant... 

if significant: more indegrees- more likely to send later. people with more out-degrees - more likely to send less (negative). isolate more likely to be alone. 


Need to think of model that makes sense in this


```{r}

```

REVIEWING RSIENA NOW
-   negative degree - density is less than .5 
-   interpretations 
-   Mailing list for TOM

-   Now: re-read chapter 2 and chapter 5 

Afternoon: play with estimating our own models 
Nice to do with our own data - collaboration networks. 
Logic of RSIENA: took actor oriented approach - works if ties are directed. if undirected network, logic is different: the actor deciding on undirected tie is difficult. Advise: for now treat as undirected - and then evaluate tie. 
undirected tie by reach consensus, forced into concensus, etc - reciprocity matters less now. 

```{r}

```

# Now play with it 

```{r}


```


```{r}
```


Now - referencing web scraping site 

```{r}
fcolnet <- function(data = scholars, university = "RU", discipline = "sociology", waves = list(c(2015,
    2018), c(2019, 2023)), type = c("first")) {

    # step 1
    demographics <- do.call(rbind.data.frame, data$demographics)
    demographics <- demographics %>%
        mutate(Universiteit1.22 = replace(Universiteit1.22, is.na(Universiteit1.22), ""), Universiteit2.22 = replace(Universiteit2.22,
            is.na(Universiteit2.22), ""), Universiteit1.24 = replace(Universiteit1.24, is.na(Universiteit1.24),
            ""), Universiteit2.24 = replace(Universiteit2.24, is.na(Universiteit2.24), ""), discipline.22 = replace(discipline.22,
            is.na(discipline.22), ""), discipline.24 = replace(discipline.24, is.na(discipline.24), ""))

    sample <- which((demographics$Universiteit1.22 %in% university | demographics$Universiteit2.22 %in%
        university | demographics$Universiteit1.24 %in% university | demographics$Universiteit2.24 %in%
        university) & (demographics$discipline.22 %in% discipline | demographics$discipline.24 %in% discipline))

    demographics_soc <- demographics[sample, ]
    scholars_sel <- lapply(scholars, "[", sample)

    # step 2
    ids <- demographics_soc$au_id
    nwaves <- length(waves)
    nets <- array(0, dim = c(nwaves, length(ids), length(ids)), dimnames = list(wave = 1:nwaves, ids,
        ids))
    dimnames(nets)

    # step 3
    df_works <- tibble(works_id = unlist(lapply(scholars_sel$work, function(l) l$id)), works_author = unlist(lapply(scholars_sel$work,
        function(l) l$author), recursive = FALSE), works_year = unlist(lapply(scholars_sel$work, function(l) l$publication_year),
        recursive = FALSE))

    df_works <- df_works[!duplicated(df_works), ]

    # step 4
    if (type == "first") {
        for (j in 1:nwaves) {
            df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
                ]
            for (i in 1:nrow(df_works_w)) {
                ego <- df_works_w$works_author[i][[1]]$au_id[1]
                alters <- df_works_w$works_author[i][[1]]$au_id[-1]
                if (sum(ids %in% ego) > 0 & sum(ids %in% alters) > 0) {
                  nets[j, which(ids %in% ego), which(ids %in% alters)] <- 1
                }
            }
        }
    }

    if (type == "last") {
        for (j in 1:nwaves) {
            df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
                ]
            for (i in 1:nrow(df_works_w)) {
                ego <- rev(df_works_w$works_author[i][[1]]$au_id)[1]
                alters <- rev(df_works_w$works_author[i][[1]]$au_id)[-1]
                if (sum(ids %in% ego) > 0 & sum(ids %in% alters) > 0) {
                  nets[j, which(ids %in% ego), which(ids %in% alters)] <- 1
                }
            }
        }
    }

    if (type == "all") {
        for (j in 1:nwaves) {
            df_works_w <- df_works[df_works$works_year >= waves[[j]][1] & df_works$works_year <= waves[[j]][2],
                ]
            for (i in 1:nrow(df_works_w)) {
                egos <- df_works_w$works_author[i][[1]]$au_id
                if (sum(ids %in% egos) > 0) {
                  nets[j, which(ids %in% egos), which(ids %in% egos)] <- 1
                }
            }
        }
    }
    output <- list()
    output$data <- scholars_sel
    output$nets <- nets
    return(output)
}
```


Example in class from 10 October
Siena dependent variable 

```{r}
# Step 1: load data
library(RSiena)
?sienaDependent #look for examples - script for how to run stuff 


# then check waves available
s501 #network at timepoint 1
s502 #network at timepoint 2
s503 #network at timepoint 


mynet1 <- sienaDependent(array(c(s501, s502, s503), dim=c(50, 50, 3)))
mybeh <- sienaDependent(s50a, type="behavior")

 # look for smoking and drinking behaviors

smoke <- s50s #time varying covariate. Time constant covar - varCovar
smoke <- varCovar(s50s)

mydata_example <- sienaDataCreate(mynet1, mybeh, smoke)



# Step 2: look at data
#print report on data reports

print01Report(mydata_example)


# Step 3: add effects 
myeff_example <- getEffects(mydata_example)
myeff_example



myeff_example <- includeEffects(myeff_example, unequalX, name="mynet1", interaction1 = "smoke")
myeff_example


#make algorithm to then estimate it
algo_ex <- sienaAlgorithmCreate(projname = "test_ex")
answer_ex <- siena07(algo_ex, data=mydata_example, effects=myeff_example, returnDeps = TRUE)

answer_ex


#does it change if dependent X 
myeff_example <- includeEffects(myeff_example, unequalX, name="mynet1", interaction1 = "mybeh") #name = network dependent variable, interaction 1 is covariate (smoking) - if have multiple netowrks, then could have unequalX with respect to X. We replaced smoke with my behavior, and rerun the siena model. This is an algorithm, can ask RSiena to reference previous result with 'prevans=ANS' after returnDepts = TRUE
answer_ex <- siena07(algo_ex, data=mydata_example, effects=myeff_example, returnDeps = TRUE)

answer_ex
# looking at results, mybeh = drinking -- ego less likely to spend time with people who drink more or less than them. ego also less likely to spend time with people who smoke more or less, to a lesser extent. Can use unequalX for dependent variables and covariates. key difference in estimation procedure: 
# we are also modeling behavioral results - behavior changing over time, simulated via ministep logic (one step up or down). The crucial difference: during the algorithm 
#If count statistic for smoke, then between time point 1 and 2 we use value from timepoint 1. between 2-3, use time point 2. But within the variable, the ego level behavior is changing. is not using the value as observed at t1 - it is using the value as is currently simulated. 





#could exclude variable to then include as covariate effect. 

mybeh <- sienaDependent(s50a, type="behavior") #defined as dependent variable 
drinking <- varCovar(s50a) #defined as covariate variable 

#update data object: 
mydata_example <- sienaDataCreate(mynet1, mybeh, drinking, smoke)


#and effects object: 
myeff_example <- includeEffects(myeff_example, unequalX, name="mynet1", interaction1 = "smoke")

myeff_example <- includeEffects(myeff_example, unequalX, name="mynet1", interaction1 = "drinking")

algo_ex2 <- sienaAlgorithmCreate(projname = "test_ex")

answer_ex <- siena07(algo_ex2, data=mydata_example, effects=myeff_example, returnDeps = TRUE)
answer_ex

#then the network dynamics change 
# interpretation of the rate constant - the average amount of ministeps each ego is making between the two observations

#behavioral dynamics: 
# the rate constant: the number of steps in the behavioral aspect 

#statistic for linear shape - linear shape effect - the value of the *centered* behavioral variable (average in the total sample). 
# if positive, try to increase behavior. effect is z1 (zed score), with the options of 1, 0, or -1. behavior increases because positively evaluates 1 more than 0, more than -1. At some point, though, behavior will stop increasing - that is z2 (squared!). so, parameter moderates itself with time?? 

# these are the minimum effects to include, because it predicts mean value observe in dataset. 2 parameters - estimate mean behavior. based on model results, what is the mean value. 

# behavioral variables in dataset - everything observed that changes. ex., position (functie), citations, interdisciplinarity score. Problem: is time window large enought to see change? and over time, behavioral variable is only changing - which then must be calculated as continuous depended variable. 


```


